package com.mano.web.webmagic;

import com.mano.web.domain.Journal;
import com.mano.web.domain.JournalPage;
import com.mano.web.mapper.JournalMapper;
import com.mano.web.mapper.JournalPageMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.stereotype.Service;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;

import java.util.Map;

/**
 * @Author: zj
 * @Description:
 * @Date: Created in 14:47 2020/9/3
 * @Modified By:
 */
@Component
public class XpaperZgtcbPopeline implements Pipeline {

    private static Logger logger = LoggerFactory.getLogger(XpaperZgtcbPopeline.class);

    @Autowired
    private JournalMapper journalMapper;

    @Autowired
    private JournalPageMapper journalPageMapper;

//    @Autowired
//    private XpaperZgtcbJournalPageContentPopeline xpaperZgtcbJournalPageContentPopeline;

    @Override
    public void process(ResultItems resultItems, Task task) {
        for(Map.Entry<String,Object> entry: resultItems.getAll().entrySet()){
            if (entry.getKey().contains("journal")) {
                Journal journal = (Journal) entry.getValue();
                if(null != journal){
                    Journal oldJournal = journalMapper.selectByIssue(journal.getIssue());
                    if(null == oldJournal){
                        /**
                         * //替换URL为 appurl
                         * http://i.xpaper.net/cnsports/release/542/2069.shtml
                         * 替换为
                         * http://i.xpaper.net/cnsports/apprelease/542/2069.shtml
                         */
                        journal.setUrl(journal.getUrl().replaceAll("release", "apprelease"));
                        journalMapper.insertSelective(journal);

                        logger.info("journalMapper-insert：" + journal.toString());

                        String oldPageHtmlUrl = null;

                        for(JournalPage journalPage : journal.getJournalPages()){
                            journalPage.setJournalId(journal.getId());
                            /**
                             * //替换URL为 appurl
                             * http://i.xpaper.net/cnsports/release/542/2069.shtml
                             * 替换为
                             * http://i.xpaper.net/cnsports/apprelease/542/2069.shtml
                             */
                            oldPageHtmlUrl = journalPage.getPageHtmlUrl();//保存原有url地址，用于数据页面内容抓取
                            journalPage.setPageHtmlUrl(journalPage.getPageHtmlUrl().replaceAll("release", "apprelease"));
                            journalPageMapper.insertSelective(journalPage);

                            logger.info("journalPageMapper-insert：" + journalPage.toString());

                            logger.info("XpaperZgtcbJournalPageContentProcessor-start");
                            //这里我们对后面的页面进行了深度的抓取，这里就不再进行过//多的表述，如果需要可以联系我。
//                            Spider spider = Spider.create(new XpaperZgtcbJournalPageContentProcessor());
//                            spider.addUrl(oldPageHtmlUrl);
//                            spider.addPipeline(xpaperZgtcbJournalPageContentPopeline);
//                            spider.thread(1);
//                            spider.setExitWhenComplete(true);
//                            spider.start();
//                            logger.info("XpaperZgtcbJournalPageContentProcessor-end");
                        }

                    }else{
                        logger.info("期号为" + journal.getIssue() + "的期刊已经存在!");
                    }
                }
            }
        }
    }
}
